#!/bin/sh
## 1. select the interest of MHB regions in merged mhap data
region="chr1:119,527,091-119,527,476"
mhap_T="/sibcb1/bioinformatics/dataupload/cancermhaps/mHap/in_house"
mhap_N="/sibcb1/bioinformatics/dataupload/cancermhaps/mHap/public/normal"
####Tumor
for i in `ls ${mhap_T}/*.mhap.gz` ; do
tabix ${i} $region > ${i##*/}_${region//[:-]/_}.mhap
done
cat *_cancer.mhap.gz_${region//[:-]/_}.mhap |sort -k1,1V -k2,2n |bgzip >cancer_merged_${region//[:-]/_}.mhap.gz
tabix -b 2 -e 3 cancer_merged_${region//[:-]/_}.mhap.gz
rm *_cancer.mhap.gz_${region//[:-]/_}.mhap
####Normal
for i in breast colon esophagus head_and_neck liver lung ovary pancreas stomach thyroid;do
tabix ${mhap_N}/${i}.mhap.gz $region > ${i}_normal_${region//[:-]/_}.mhap
done
cat *_normal_${region//[:-]/_}.mhap |sort -k1,1V -k2,2n |bgzip >normal_merged_${region//[:-]/_}.mhap.gz
tabix -b 2 -e 3 normal_merged_${region//[:-]/_}.mhap.gz
rm *_normal_${region//[:-]/_}.mhapThe results were generated by the R script Kmer_Normalization.R.
Figure 1: K4-mer of Tumor vs. Normal
Early cancer prediction using cfDNA methylation data was performed with Python scripts, which are available as mHapSummary_XHL.py, Matrix_xHL.py, ROC_cross_validation.py. The results are as follows:
Figure 2: MHL- and Beta-based prediction of cancer early detection with plasma DNA using randomforest model